'''
Michael D. Young
'''


import pandas as pd
import sys

sys.path.append(r"C:\Michael\Albany LAIO Lab and Research Projects\Python Utilities")


'''
Flags
'''
autoscale=True # autoscale flag. If true use autoscale on figures
save_images=True # If true save images to disk.

Image_dir = "C:/Michael/Albany LAIO Lab and Research Projects/Is a profile worth/images/"

Data_dir = "C:/Michael/Albany LAIO Lab and Research Projects/Is a profile worth/data/"

Document_Data = pd.read_excel("C:/Michael/Albany LAIO Lab and Research Projects/Is a profile worth/input data/TALID Version 1 Scores_by_Document.xlsx")

Leader_Means_DF= pd.read_excel("C:/Michael/Albany LAIO Lab and Research Projects/Is a profile worth/input data/TALID Version 1 Scores_by_Leader.xlsx")


#print(Document_Data)


Cummulative_Dataframe_list=list()

''' debug
for name in unique_values:
    print(name)
'''

#count files
Temp_DF= Document_Data.groupby(['FirstOfAuthor']).count()

#make sure indexes pair with number of rows
Temp_DF=Temp_DF.reset_index()

#get list of authors with at least 150 files
Temp_DF=Temp_DF[Temp_DF['Filename']> 150]

N_Leaders=len(Temp_DF)
print("Number of leaders with > 150 documents", "", N_Leaders)

unique_values = Temp_DF['FirstOfAuthor'].unique()
#unique_values=['Ahern_Bertie','Arroyo-Macapagal']

counter= 1        
        
#for each leader create doc random samples and calculate mean
for name in unique_values:  
    
    print(str(counter)+" of "+str(N_Leaders)+" leaders")
    counter+=1
    # accumulate results in OutputRows
    OutputRows=[] 
    
    # select rows where FirstOfAuthor = name  
    author_rows = Document_Data[Document_Data['FirstOfAuthor']==name]    
    author_rows = author_rows.reset_index() #make sure indexes pair with number of rows
        
    for Sample_size in [5,6,7,8,9,10,11,12,13,14,15,20,30,40,50,60]:  
        
        print(name+" "+str(Sample_size)+" docs")
        
        '''N draws per sample size + author'''
        for i in range(10000):
            
            Random_author_rows_DF= author_rows.sample(n=Sample_size, replace=False, random_state=None)
        
        
            # set accumulators to 0 before the loop
            Words=0
            
            IC=0
            EC=0
            
            #HC and LC for CC
            HC=0
            LC=0
            
            #initialize accumulators to 0 before DIS = HD / (HD + LD)
            HD=0
            LD=0
            
            #initialize accumulators to 0 before IGB = HB / (HB + LB)
            HB=0
            LB=0
            
            #PWR
            HP=0
            LP=0
            
            #SC
            HS=0
            LS=0
            
            #TASK
            HT=0
            LT=0
            
            #nACH-100
            nACH=0
            
            #nAFF-100
            nAFF=0
            
            #nPWR-100
            nPWR=0
            
            #I1
            self_positive=0
            self_negative=0
            
            #P1
            other_positive=0
            other_negative=0
            
            '''
            combine the obs from each document in the draw 
            to calculate a score for the draw
            '''
            
            for index, row in Random_author_rows_DF.iterrows():
        
                #calculate BACE, CC, DIS, IGB, PWR, SC, TASK
                
                #update accumulators
                Words += row['Word_Count']
                
                IC += row['IC']
                EC += row['EC']
                
                HC += row['HC']
                LC += row['LC']
                
                HD += row['HD']
                LD += row['LD']
                
                HB += row['HB']
                LB += row['LB']
                
                HP += row['HP']
                LP += row['LP']
                
                HS += row['HS']
                LS += row['LS']
                
                HT += row['HT']
                LT += row['LT']
                
                nACH += row['nACH']
                
                nAFF += row['nAFF']
                
                nPWR += row['nPWR']
                
                #print(self_positive, self_negative, other_positive, other_negative)
                  
                self_positive += row['self appeal']+row['self promise']+row['self reward']
                self_negative += row['self punish']+row['self threaten']+row['self oppose']
                  
                other_positive += row['other appeal']+row['other promise']+row['other reward']
                other_negative += row['other punish']+row['other threaten']+row['other oppose']
                
                '''  End of draw'''
                
                
                
            #Calculate draw scores
            try: 
                BACE = IC / (IC + EC)
             
            except ZeroDivisionError:
                BACE = 'nan'
                
          
            try: CC = HC / (HC + LC)
          
            except ZeroDivisionError:
              CC = 'nan'
              
              
            try: DIS = HD / (HD + LD)
          
            except ZeroDivisionError:
                DIS ='nan'
                
              
            try: IGB = HB / (HB + LB)
            
            except ZeroDivisionError:
                IGB ='nan'
                
                
            try: PWR = HP / (HP + LP)
                
            except ZeroDivisionError:
                    PWR = 'nan'
                    
                    
            try: SC = HS / (HS + LS)
                
            except ZeroDivisionError:
                    SC = 'nan'
                    
            
            try: TASK = HT / (HT + LT)
               
            except ZeroDivisionError:
                    TASK ='nan'
                  
                
            #new formula for nACH-100, nAFF_100, nPWR-100
            
            
            try: nACH_100 = (nACH / Words) * 100
             
            except ZeroDivisionError:
                  nACH_100 = 'nan'
                  
                  
            try: nAFF_100 = (nAFF / Words) * 100
             
            except ZeroDivisionError:
                  nAFF_100 = 'nan' 
                  
                  
            try: nPWR_100 = (nPWR / Words) * 100  
                  
            except ZeroDivisionError:
                  nPWR_100 = 'nan'
                  
            # formula for I1 and P1 after assigning positive and negative attributions to self_positive and self-negative
            
                  
            try: I1= (self_positive - self_negative)/(self_positive + self_negative)
         
            except ZeroDivisionError:
                I1  = 'nan'
                
            try: P1 = (other_positive - other_negative)/(other_positive + other_negative)
          
            except ZeroDivisionError:
                P1  = 'nan'
                
          
            #specify some values for NewRow
            NewRow={'Author': name, 'Sample':i, 'Words':Words, 
                      'BACE':BACE, 'BACE-observations': IC+EC, 'CC':CC, 'CC-observations': HC+LC, 
                      'DIS':DIS, 'DIS-observations': HD+LD, 'IGB':IGB, 'IGB-observations': HB+LB, 
                      'PWR':PWR, 'PWR-observations': HP+LP, 'SC':SC, 'SC-observations': HS+LS, 
                      'TASK':TASK, 'TASK-observations': HT+LT, 'nACH-100':nACH_100, 
                      'nACH-100-observations': nACH, 'nAFF-100':nAFF_100, 'nAFF-100-observations': nAFF, 
                      'nPWR-100':nPWR_100, 'nPWR-100-observations': nPWR, 'I1':I1, 
                      'I1-observations':self_positive+self_negative, 
                      'P1':P1, 'P1-observations':other_negative+other_positive}
         
            #Add NewRow to  OutputRows
            OutputRows.append(NewRow)
            
            #end inner loop
            
            
        #end draws loop
    
    
        OutputDF=pd.DataFrame.from_records(OutputRows)
                          
        
        '''
        coerce OutputDF variables values to float
        '''
        for variable in ['BACE','CC','DIS','IGB','PWR','SC','TASK','nACH-100','nAFF-100','nPWR-100','I1','P1',]:
            OutputDF[variable] = OutputDF[variable].astype(float)
        
        
        #name='Abbas_Mahmoud'
        
        Leader_Means=Leader_Means_DF[Leader_Means_DF['Author']==name]
        
        '''convert estimates to distance from leader mean'''
        
        BACE=Leader_Means.iloc[0]['BACE']
        OutputDF['BACE']=OutputDF['BACE']-BACE
        
        CC=Leader_Means.iloc[0]['CC']
        OutputDF['CC']=OutputDF['CC']-CC
        
        DIS=Leader_Means.iloc[0]['DIS']
        OutputDF['DIS']=OutputDF['DIS']-DIS
        
        IGB=Leader_Means.iloc[0]['IGB']
        OutputDF['IGB']=OutputDF['IGB']-IGB
        
        PWR=Leader_Means.iloc[0]['PWR']
        OutputDF['PWR']=OutputDF['PWR']-PWR
        
        SC=Leader_Means.iloc[0]['SC']
        OutputDF['SC']=OutputDF['SC']-SC
        
        TASK=Leader_Means.iloc[0]['TASK']
        OutputDF['TASK']=OutputDF['TASK']-TASK
        
        nACH=Leader_Means.iloc[0]['nACH-100']
        OutputDF['nACH-100']=OutputDF['nACH-100']-nACH
        
        nAFF=Leader_Means.iloc[0]['nAFF-100']
        OutputDF['nAFF-100']=OutputDF['nAFF-100']-nAFF
        
        nPWR=Leader_Means.iloc[0]['nPWR-100']
        OutputDF['nPWR-100']=OutputDF['nPWR-100']-nPWR
        
        I1=Leader_Means.iloc[0]['I1']
        OutputDF['I1']=OutputDF['I1']-I1
        
        P1=Leader_Means.iloc[0]['P1']
        OutputDF['P1']=OutputDF['P1']-P1
    
        
        
        '''create a cummulative DF?'''
        Cummulative_Dataframe_list.append(OutputDF)
        
        
Cummulative_Data_DF = pd.concat(Cummulative_Dataframe_list)

Cummulative_Data_DF.to_csv(Data_dir+"Cumulative monte carlo.csv", index=False)    

Cummulative_Dataframe_list=list()
    
print("Clean exit")
       